1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

3.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

3.2 Correlations - Linear Regression

We tested the minimum model, adding only Birth Year, but adding Birth Year and Sampling Date we got the best results.

3.2.1 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY_SAMP/regression_summary_all_traits_BY_SampDt.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation (<0.15):
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture
  • CK = Clinical Ketosis
  • HHE = Heel Horn Erosion
  • MSP = Milking Speed

4 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

5 PHENOTYPE file

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

#The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

6 SNP MAP

Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

6.1 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

7 Quality Control

We ran the code below to perfom the QC okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

8 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink. okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king

plink2 \
    --bfile ${DATA} \
    --cow \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga: okok

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is no duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

9 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --cow \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

9.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

10 GWAS on GCTA

Previously we have performed GWAS on GCTA:

10.1 GWAS - EXTREME PHENO - WITH BY and SD

10.1.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

10.1.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components, and after this, using this variance components from the whole sample we performed the ssGWAS with the subset of individuals (34 High + 37 Low = 71), but this was not possible in GCTA so we switched to another software (BLUPF90+)

11 GWAS BLUPF90+

To run ssGWAS on Blupf90+ suite, we will need 4 different softwares:

The tutorial for preGSF90 and postGSF90 we can find in the link bellow
https://nce.ads.uga.edu/wiki/doku.php?id=readme.pregsf90#gwas_options_postgsf90

According to the BLUPF90+ tutorial:

ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV

11.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • preGSf90 executable file
  • postGSf90 executable file
  • Parameter file

      11.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Phenotype
      THIRD COLUMN = Fixed Effect 1
      FOURTH COLUMN = Fixed Effect 2

      First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      To get in one file these four columns we need the following code:

      okok

      pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
      ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
      
      
      # Create data_final$cdn_id by matching IDs with elora_id
      data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
      fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
      fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
      fixeff$FID <- "HO"
      fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
      
      identical(fixeff$cdn_id, pheno$cdn_id)
      
      fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
      pheno <- pheno[,c("cdn_id", "cortisol")]
      
      # Load necessary libraries
      library(dplyr)
      
      # Merge pheno and fixeff data frames
      merged_data <- pheno %>% 
        left_join(fixeff, by = "cdn_id")
      
      
      merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
      
      merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
      
      write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)

      The file should be saved as text file, with separation by space and no columns names.

      PS: if there are any NA, they sould be replaced by 9999

      /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt (this file has 252 individuals)

      11.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Sire ID
      THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      /home/bambrozi/2_CORTISOL/RawFiles/Pedigree okok

      # to replace comma for space in the .csv file with the equivalence among IDs
      # /home/bambrozi/2_CORTISOL/RawFiles/Pedigree
      sed 's/,/ /g' bruno_ped.iid.csv > bruno_ped_iid_blup.txt

      11.1.3 Genotype file

      First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and save in a different object.

      /home/bambrozi/2_CORTISOL/RawFiles/Genotypes

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid
      Below we can find the file’s location from the code above:
      • /home/bambrozi/2_CORTISOL/RawFiles/Genotypes/bruno_gntps.txt
      • /home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv
      • /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid (this file has 252 individuals)

      11.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+ = we will use to estimate the Variance components and GEBVs
      • renumF90 = we will use to renumerate the files
      • preGSf90 = we will use to perform the Quality control
      • postGSf90 = we will use for GWAS

      11.1.5 SNP MAP

      /home/bambrozi/2_CORTISOL/RawFiles/SNP_map
      okok

      mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)
      
      colnames(mapfile)
      
      colnames(mapfile) <- c("SNP_ID", "CHR", "POS")
      
      mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]
      
      write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)

11.2 Variance component estimation

But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but we will change some default parameters an so perform this additional step.

The Parameter card for this step is the parameter bellow:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par

okok


DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
  • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
  • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
  • FIELDS_PASSED TO OUTPUT:
  • WEIGHT (S):
  • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
  • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
  • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
  • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
  • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
  • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
  • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
  • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
  • OPTION outcallrate: Save the call rate information on SNP markers in the file.
  • OPTION saveCleanSNPs: This option generates 4 new files. We assume snpfile as a marker file.
    • snpfile_clean = new SNP marker file.
    • snpfile_clean_XrefID = new cross-reference file.
    • snpfile_SNPs_removed = a list of removed markers.
    • snpfile_Animals_removed = a list of removed animals.
  • OPTION minfreq 0.01: Minimum allele frequency to retain the marker.
  • OPTION map_file snpmap.txt: This option will upload the SNP MAP
  • OPTION excludeCHR 30 31: This option will remove sexual chromosome that is the 30 and 31

To run any softwere from Blupf90 suit we will perform always in this way:

  1. Go to the server you wanna run this analysis, for instance, grand
ssh grand
  1. Now go to the directory you have created to run this analysis where that set of files are placed.
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.

The command above will generate couple files, among them renf90.par

We modified renf90.par in:
  • renf90_DataClean.par
  • renf90_VarCompEst.par

11.2.1 Quality control

The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par

It will run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.2.2 VCE

The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par

It will be run using the software pre blupf90+ to generate the VCE. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
  • OPTION SNP_file bruno_gntps_iid_clean: we are going to inform the genotype file generated in the previous step (the Quality Control). Blup will create an file with the same name that the original genotype file, and add the sufix **_clean**
  • OPTION no_quality_control we need to set up this option because we performed Quality Control in the previous step and now we don’t need that Blupf90+ perform again. Blupf90+ by default perform quality control, so if we do not want, we need to specify.
  • OPTION method: VCE (Variance Component Estimation).
  • OPTION OrigID: this will keep the original ID informed.
  • OPTION missing 9999: you are informing that missing values will appear as 9999
  • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
    • H2_1: the name that your function will appear on the output files.
    • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
    • **_1_1**: this effect is in the 1st column.
    • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.

The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log

blupf90.log
My Image

Now you should update you renf90_VarCompEst.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE

run blupf90+ again with the updated renf90_VarCompEst.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

blupf90.log
My Image

Parameter card
My Image

Now that we have the Variance components we go for the next step:
  • Prediction of SNP marker effects based on the GEBV
  • GWAS for High and Low cortisol animals
To do this, I’ll:
  • Keep the Phenotype_FixEff file from the previous analysis with records for 252 individuals
  • generate the genotype file like bellow
  • Perform the QUALITY CONTROL for the new (71 samples) genotype file
  • Add the VCE from the 252 data set in the parameter card
  • Generate the GEBV
  • Run GWAS

11.3 Prediction of SNP marker effects based on the GEBV

11.3.0.1 GWAS for High and Low cortisol animals

Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.

To perform this first we need a a Genotype file only with the 71 animals (High=34 and Low=37)

11.3.0.1.1 Updating the files
11.3.0.1.1.1 Phenotype and Fixed Effects files

In this read_me file we performed a ssGWAS but keeping the phenotype and fixed effect information for all 252 individuals, only reducing the number of individuals for the Genotype file..

BUT we are going to used the code bellow to remove individuals with MEDIUM cortisol Phenotype just to use as base to get the genotype file in the next step.

phenofix <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt")

# Now we are gona remove the intermediary animals from pheno object
phenofix$Categorical <- ifelse(phenofix$V2 >= 956, "H", 
                               ifelse(phenofix$V2 <= 190.8, "L", "M"))
table(phenofix$Categorical)

phenofix <- phenofix[phenofix$Categorical != "M", ]

phenofix <- phenofix[, c("V1", "V2", "V3", "V4")]

write.table(phenofix, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt", col.names = F, row.names = F, quote = F)
11.3.0.1.1.2 Genotype files

I used this command line bellow to remove the individuals with MEDIUM phenotypes. okok

awk 'NR==FNR{ids[$1]; next} $1 in ids' /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid > /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/bruno_gntps_iid_71

11.3.0.2 Running renum_QC.par

Run with renumf90

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renum_QC.par

okok

DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in 3 copies:
  • renf90_DataClean.par
  • renf90_ssGWAS1_Ginv.par
  • renf90_ssGWAS2_SNPeff.par

11.3.0.3 Running renf90_DataClean.par for Quality Control

To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_DataClean.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.3.0.4 Running renf90_ssGWAS1_Ginv.par for Ginv estimation

May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.

ulimit -s unlimited

The parameter card bellow we are going to run using the software blupf90+: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS1_Ginv.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value

11.3.0.5 Running renf90_ssGWAS2_SNPeff.par for GWAS

The parameter card bellow we are going to run using the software postGSf90: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS2_SNPeff.par

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
This code will generate several files, among them:
  • chrsnp_pval:
    • Column 1: trait
    • Column 2: effect
    • Column 3: -log10(p-value)
    • Column 4: SNP
    • Column 5: Chromosome
    • Column 6: Position in bp
  • Pft1e3.R: a r-code to generate the Manhattan plot in R using the chrsnp_pval

11.3.0.6 Manhattan Plots for BLUPF90 GWAS

11.3.0.6.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
png(file = "Pft1e3_manplot_with_bonf_ind_seg.png", width = 20000, height = 10000, res = 600)
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

We will use the file /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval to get the p-values for the SNPs in the windows (that explain more than 0.5% of additive genetic variance)

11.3.0.7 qq-Plot

setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno")


gwas<- read.table("chrsnp_pval", h=F)

colnames(gwas) <- c("trait", "effect", "-log10(p-value)", "SNP", "Chromosome", "Position in bp")

gwas$p_value <- 10^(-gwas$`-log10(p-value)`)

ps<- gwas$p_value

inflation <- function(ps) {
  chisq <- qchisq(1 - ps, 1)
  lambda <- median(chisq) / qchisq(0.5, 1)
  lambda
}
# Calculating the lambda -  the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if 
# the observed p-values are more significant than expected.
inflation(ps)

gg_qqplot <- function(ps, ci = 0.95) {
  n  <- length(ps)
  df <- data.frame(
    observed = -log10(sort(ps)),
    expected = -log10(ppoints(n)),
    clower   = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
    cupper   = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
  )
  log10Pe <- expression(paste("Expected -log"[10], plain(P)))
  log10Po <- expression(paste("Observed -log"[10], plain(P)))
  ggplot(df) +
    geom_ribbon(
      mapping = aes(x = expected, ymin = clower, ymax = cupper),
      alpha = 0.1
    ) +
    geom_point(aes(expected, observed), shape = 1, size = 3) +
    geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
    # geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
    # geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
    xlab(log10Pe) +
    ylab(log10Po)
}

## plot -----
gg_qqplot(ps) +
  theme_bw(base_size = 24) +
  annotate(
    geom = "text",
    x = -Inf,
    y = Inf,
    hjust = -0.15,
    vjust = 1 + 0.15 * 3,
    label = sprintf("λ = %.2f", inflation(ps)),
    size = 8
  ) +
  theme(
    axis.ticks = element_line(size = 0.5),
    panel.grid = element_blank()
    # panel.grid = element_line(size = 0.5, color = "grey80")
  )

My Image

11.4 BLUPF90+ WINDOWS

To perform Window approach on Blupf90+ I copied all files in: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno to: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10 I did this because I’m not sure about which files the postGSf90 needs to run, so I duplicate the whole directory, and I will change only the file renf90_ssGWAS2_SNPeff.par name for renf90_ssGWAS2_SNPeff_W_10.par and change the options to perform the window approach.

11.4.1 Running renf90_ssGWAS2_SNPeff_W_10.par for GWAS (WINDOWS)

The parameter card bellow we are going to run using the software postGSf90:

okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
OPTION SNP_moving_average 10
OPTION windows_variance 10 
The parameter file above will generate the following files:
  • snp_sol
    • column 1: trait
    • column 2: effect
    • column 3: SNP
    • column 4: Chromosome
    • column 5: Position
    • column 6: SNP solution
    • column 7: weight
    • column 8: variance explained by n adjacents SNP (if OPTION windows_variance is used).
    • column 9: variance of the SNP solution (used to compute the p-value) (if OPTION snp_p_value is used)
  • snp_pred
    • contains allele frequencies + SNP effects
  • chrsnpvar
    • column 1: trait
    • column 2: effect
    • column 3: variance explained by n adjacents SNP
      • If windows size is 20, the proportion of variance assigned to SNP 1 is calculated from SNP 1 to 20, for SNP 2 it goes from 2 to 21
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • chrsnp_pval
    • column 1: trait
    • column 2: effect
    • column 3: -log10(p-value)
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position in bp
  • chrsnp
    • column 1: trait
    • column 2: effect
    • column 3: values of SNP effects to use in Manhattan plots → [abs(SNP_i)/SD(SNP)]
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • windows_segments
    • column 1: label
    • column 2: window size (number of SNP)
    • column 3: Start SNP number for the window
    • column 4: End SNP number for the window
    • column 5: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
  • windows_variance
    • column 1: trait
    • column 2: effect
    • column 3: Start SNP number or SNP name for the window
    • column 4: End SNP number or SNP name for the window
    • column 5: window size (number of SNP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
    • column 8: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 9: variance explained by n adjacents SNP
  • Vft1e3.R
  • Sft1e3.R
  • Pft1e3.R

Bellow we can see the SNPs that explain more than 0.5% of Genetic Variance okok

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

write.csv(w_var, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05.csv")

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/SNPchimp_result_1425506748.tsv", header = T)

merged <- merge(rsid, w_var, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "Var")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "Var")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05_rsid_snpvar.csv")
X SNP_name rsID CHR BP Var
1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.5601843
2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2237598
3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6040267
4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.0965189
5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5887867
6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.0283717
7 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.2853203
8 ARS-BFGL-NGS-34854 rs109738387 23 14185501 0.5312912
9 ARS-BFGL-NGS-37809 rs42751504 3 111751663 0.9351573
10 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.7857140
11 ARS-BFGL-NGS-44131 rs110100483 3 111806406 0.8576835
12 ARS-BFGL-NGS-57285 rs109872657 9 10112014 0.5060728
13 ARS-BFGL-NGS-58358 rs109507088 27 16692516 0.5575927
14 ARS-BFGL-NGS-6202 rs110385521 3 111833768 0.9820334
15 ARS-BFGL-NGS-97489 rs42111498 27 16546003 0.5266455
16 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.6104093
17 BTA-25900-no-rs rs41575397 13 18509812 0.6018006
18 BTA-49096-no-rs rs41578131 2 115695003 1.5221061
19 BTA-95363-no-rs rs41615088 16 6121692 0.5967890
20 BTB-00952622 rs42110734 27 16663290 0.5891105
21 BTB-01068898 rs42226640 6 8630173 0.5563913
22 BTB-01069210 rs42225053 6 9142299 0.5157691
23 BTB-01485274 rs42609685 24 28540641 0.7103497
24 BTB-01608944 rs42723390 15 10974997 0.5126189
25 BTB-01641394 rs42752353 3 111730561 0.8541772
26 BTB-01948148 rs43056622 3 111677167 0.6871511
27 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8040708
28 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.0996405
29 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5085677
30 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.5842979
31 Hapmap54541-rs29014049 rs29014049 8 55218811 0.6997957
32 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7073805
33 Hapmap54981-rs29019846 rs29019846 24 28516684 0.6150000
34 Hapmap58887-rs29013502 rs29013502 24 28570245 0.5794518

11.4.1.1 Manhattan Plots for BLUPF90 Windows ssGWAS

okok

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/")
getwd()

# Read and process data
yyy1 = read.table("chrsnpvar")
yyy  = yyy1[order(yyy1$V4),]
zzz  = yyy[ which(yyy$V1==1 & yyy$V2==3), ]
n    = nrow(zzz)
y    = zzz[,4]
x    = zzz[,3]
chr1 = zzz[,5]
chr  = NULL
pos  = NULL
for (i in unique(yyy$V5)) {
  zz     = yyy[yyy$V5==i,]
  key    = zz$V4 
  medio  = round(nrow(zz)/2,0)
  z      = key[medio]
  pos    = c(pos,z)
}

# Assign colors for chromosomes
chrn       = unique(yyy$V5)
one        = which(chr1%%4==0) 
two        = which(chr1%%4==1) 
three      = which(chr1%%4==2) 
four       = which(chr1%%4==3) 
chr[one]   = "darkgoldenrod"
chr[two]   = "darkorchid"
chr[three] = "blue"
chr[four]  = "forestgreen"

# Generate Manhattan plot and save to PNG
png(file = "Vft1e3_manplot_with_thresholds.png", 
    width = 20000, height = 10000, res = 600) # Configure width, height, and resolution

# Set plot parameters
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2, mar = c(5, 5, 4, 2))

# Create Manhattan plot
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP Variance explained by 10 adjacents SNP window", 
     xlab = "", ylab = "% variance expl", pch = 20, 
     xlim = c(1, n), ylim = c(0, max(x)), col = chr, cex.axis = 1.2)

# Add dashed lines for thresholds
abline(h = 0.5, col = "red", lwd = 2, lty = 2)  # Red dashed line at 0.5

# Add legend for thresholds
legend("topright", legend = c("Threshold 0.5%"), 
       col = "red", lwd = 2, lty = 2, cex = 1)

# Add chromosome labels on the X-axis
axis(1, at = pos, labels = chrn, las = 1, cex.axis = 0.8)

# Close the graphics device
dev.off()

My Image

11.4.2 Finding p-values for each SNP in the window

To bring the p-value for each snp in the window we need to use files from different approachs:
  • chrsnpvar:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar

    • which contains the variance explained by each window. This file is from windows approach
  • chrsnp_pval:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval

    • which contains the p-value for each snp. This file is not from windows approach. Is for the approach which considers each SNP individually (beside fit all SNPs at the same time)

okok

chrsnpvar <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
chrsnpvar <- filter(chrsnpvar, V3 > 0.5)
colnames(chrsnpvar) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

# as the column 3: -log10(p-value) of this file doesn't say that this p-value is for the window, I'm assuming that this p-value is for individual SNP
chrsnp_pval <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval", header = F)
colnames(chrsnp_pval) <- c("trait", "effect", "-log10(p-value)", "SNP", "Chromosome", "Position in bp")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

# Bringing p-value for each snp in the window

# Create an empty row with the same column names
empty_row <- chrsnpvar[1, ]
empty_row[,] <- NA  # Set all values to NA

# Create a new dataframe with gaps
expanded_df <- do.call(rbind, lapply(1:nrow(chrsnpvar), function(i) {
  rbind(chrsnpvar[i, ], empty_row[rep(1, 9), ])
}))

# Reset row names
rownames(expanded_df) <- NULL

# Ensure the SNP column is numeric
expanded_df$SNP <- as.numeric(expanded_df$SNP)

# Loop through the dataframe and fill empty SNP values with sequential numbers
for (i in seq_len(nrow(expanded_df))) {
  if (is.na(expanded_df$SNP[i])) {
    expanded_df$SNP[i] <- expanded_df$SNP[i - 1] + 1
  }
}


# Assuming your data frame is called df
expanded_df$index <- 1:nrow(expanded_df)
merged_pvalue_var <- merge(chrsnp_pval, expanded_df, by = "SNP")
merged_pvalue_var <- arrange(merged_pvalue_var, index)

merged_pvalue_var$p_value <- 10^(-merged_pvalue_var$`-log10(p-value)`)

merged_pvalue_var <- merge(merged_pvalue_var, snp_map, by.x = c("Chromosome", "Position in bp"), by.y = c("CHR", "POS"), all.x = TRUE)

merged_pvalue_var <- arrange(merged_pvalue_var, index)

pvalue_var_allsnps <- merged_pvalue_var[, c("index", "Chromosome", "Position in bp",
                                            "SNP", "SNP_ID", "-log10(p-value)",
                                            "p_value", "Var")]
## file below double cheked by hand
write.csv(pvalue_var_allsnps, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/pval_for_each_snp_in_window.csv")

The p-value for each SNP in the window is bellow:

X index Chromosome Position.in.bp SNP SNP_ID X.log10.p.value. p_value Var
1 1 2 115622067 4582 Hapmap41888-BTA-49091 0.4075785 0.3912204 0.8040708
2 2 2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 NA
3 3 2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
4 4 2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
5 5 2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
6 6 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
7 7 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
8 8 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
9 9 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
10 10 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
11 11 2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 1.2237598
12 12 2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
13 13 2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
14 14 2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
15 15 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
16 16 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
17 17 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
18 18 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
19 19 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
20 20 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
21 21 2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 1.5221061
22 22 2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
23 23 2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
24 24 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
25 25 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
26 26 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
27 27 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
28 28 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
29 29 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
30 30 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
31 31 2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 1.2853203
32 32 2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
33 33 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
34 34 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
35 35 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
36 36 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
37 37 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
38 38 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
39 39 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
40 40 2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
41 41 2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 1.0965189
42 42 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
43 43 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
44 44 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
45 45 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
46 46 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
47 47 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
48 48 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
49 49 2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
50 50 2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
51 51 2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 0.7073805
52 52 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
53 53 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
54 54 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
55 55 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
56 56 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
57 57 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
58 58 2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
59 59 2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
60 60 2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
61 61 2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 0.7857140
62 62 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
63 63 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
64 64 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
65 65 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
66 66 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
67 67 2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
68 68 2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
69 69 2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
70 70 2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
71 71 2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 0.5887867
72 72 2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
73 73 2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
74 74 2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
75 75 2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
76 76 2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
77 77 2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
78 78 2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
79 79 2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
80 80 2 116430317 4598 Hapmap54608-rs29020417 1.0790898 0.0833509 NA
81 81 3 111677167 6878 BTB-01948148 1.0946657 0.0804145 0.6871511
82 82 3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 NA
83 83 3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
84 84 3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
85 85 3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
86 86 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
87 87 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
88 88 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
89 89 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
90 90 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
91 91 3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 1.0996405
92 92 3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
93 93 3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
94 94 3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
95 95 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
96 96 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
97 97 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
98 98 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
99 99 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
100 100 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
101 101 3 111730561 6880 BTB-01641394 4.1432148 0.0000719 0.8541772
102 102 3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
103 103 3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
104 104 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
105 105 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
106 106 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
107 107 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
108 108 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
109 109 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
110 110 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
111 111 3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 0.9351573
112 112 3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
113 113 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
114 114 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
115 115 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
116 116 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
117 117 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
118 118 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
119 119 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
120 120 3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
121 121 3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 1.0283717
122 122 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
123 123 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
124 124 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
125 125 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
126 126 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
127 127 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
128 128 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
129 129 3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
130 130 3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
131 131 3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 0.8576835
132 132 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
133 133 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
134 134 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
135 135 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
136 136 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
137 137 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
138 138 3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
139 139 3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
140 140 3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
141 141 3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 0.9820334
142 142 3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
143 143 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
144 144 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
145 145 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
146 146 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
147 147 3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
148 148 3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
149 149 3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
150 150 3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
151 151 3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 0.6104093
152 152 3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
153 153 3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
154 154 3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
155 155 3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
156 156 3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
157 157 3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
158 158 3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
159 159 3 112427851 6894 ARS-BFGL-NGS-65128 0.1144700 0.7682985 NA
160 160 3 112662102 6895 BTA-117752-no-rs 1.4158696 0.0383822 NA
161 161 6 8630173 11046 BTB-01068898 3.4115993 0.0003876 0.5563913
162 162 6 8727520 11047 Hapmap33379-BTA-153882 0.2360505 0.5806969 NA
163 163 6 8794456 11048 Hapmap44182-BTA-104972 0.2803710 0.5243594 NA
164 164 6 8881200 11049 ARS-BFGL-NGS-54017 2.2115780 0.0061436 NA
165 165 6 9049173 11050 BTB-01645485 0.0063533 0.9854774 NA
166 166 6 9142299 11051 BTB-01069210 0.1114894 0.7735895 NA
167 167 6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
168 168 6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
169 169 6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
170 170 6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
171 171 6 9142299 11051 BTB-01069210 0.1114894 0.7735895 0.5157691
172 172 6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
173 173 6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
174 174 6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
175 175 6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
176 176 6 9387456 11056 BTB-01939106 0.0631806 0.8646083 NA
177 177 6 9418094 11057 BTB-01322180 0.2428379 0.5716920 NA
178 178 6 9453914 11058 BTB-01322092 0.5623756 0.2739204 NA
179 179 6 9483232 11059 BTA-10940-no-rs 0.7251282 0.1883093 NA
180 180 6 9504486 11060 BTB-01322000 0.7154267 0.1925632 NA
181 181 8 55218811 15704 Hapmap54541-rs29014049 1.2509697 0.0561087 0.6997957
182 182 8 55298755 15705 BTA-107975-no-rs 0.2434081 0.5709418 NA
183 183 8 55323946 15706 BTB-01616745 0.5026370 0.3143135 NA
184 184 8 55376512 15707 ARS-BFGL-NGS-76732 3.7920659 0.0001614 NA
185 185 8 55433879 15708 BTB-02017947 2.7518384 0.0017708 NA
186 186 8 55458002 15709 Hapmap51811-BTA-119735 1.8587790 0.0138427 NA
187 187 8 55493961 15710 BTB-01258879 3.0361970 0.0009200 NA
188 188 8 55517877 15711 ARS-BFGL-NGS-37680 1.5463493 0.0284217 NA
189 189 8 55552514 15712 ARS-BFGL-NGS-74002 2.6498886 0.0022393 NA
190 190 8 55603363 15713 ARS-BFGL-NGS-108258 3.0361970 0.0009200 NA
191 191 9 10112014 16913 ARS-BFGL-NGS-57285 1.7786476 0.0166476 0.5060728
192 192 9 10139748 16914 ARS-BFGL-NGS-37889 1.6229291 0.0238271 NA
193 193 9 10221793 16915 ARS-BFGL-NGS-93995 0.7484987 0.1784437 NA
194 194 9 10260399 16916 ARS-BFGL-NGS-15511 0.7484987 0.1784437 NA
195 195 9 10337737 16917 BTA-83229-no-rs 0.2705532 0.5363481 NA
196 196 9 10490301 16918 ARS-BFGL-NGS-110691 0.1495558 0.7086703 NA
197 197 9 10529588 16919 Hapmap36170-SCAFFOLD200128_32745 0.2984731 0.5029524 NA
198 198 9 10601391 16920 ARS-BFGL-NGS-10202 0.0295204 0.9342856 NA
199 199 9 10622057 16921 Hapmap38633-BTA-83140 0.0528145 0.8854937 NA
200 200 9 10724264 16922 ARS-BFGL-NGS-17513 0.2137440 0.6113022 NA
201 201 13 18266073 23616 Hapmap50266-BTA-13664 0.3810341 0.4158780 0.5842979
202 202 13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 NA
203 203 13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
204 204 13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
205 205 13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
206 206 13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
207 207 13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
208 208 13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
209 209 13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
210 210 13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
211 211 13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 0.5085677
212 212 13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
213 213 13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
214 214 13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
215 215 13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
216 216 13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
217 217 13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
218 218 13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
219 219 13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
220 220 13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
221 221 13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 0.6018006
222 222 13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
223 223 13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
224 224 13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
225 225 13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
226 226 13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
227 227 13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
228 228 13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
229 229 13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
230 230 13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
231 231 13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 0.6040267
232 232 13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
233 233 13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
234 234 13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
235 235 13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
236 236 13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
237 237 13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
238 238 13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
239 239 13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
240 240 13 18952584 23629 ARS-BFGL-NGS-116613 1.2880171 0.0515208 NA
241 241 15 10974997 26385 BTB-01608944 1.3520801 0.0444549 0.5126189
242 242 15 11100934 26386 BTA-91816-no-rs 0.4733557 0.3362360 NA
243 243 15 11144666 26387 BTB-01421844 2.0829468 0.0082614 NA
244 244 15 11185546 26388 BTB-01421892 0.4733557 0.3362360 NA
245 245 15 11207429 26389 BTB-01421934 1.0078060 0.0982187 NA
246 246 15 11236303 26390 BTB-01422008 1.0078060 0.0982187 NA
247 247 15 11310260 26391 BTA-91820-no-rs 1.0078060 0.0982187 NA
248 248 15 11451408 26392 Hapmap40037-BTA-100798 0.9018532 0.1253565 NA
249 249 15 11487557 26393 BTA-94770-no-rs 0.0029926 0.9931329 NA
250 250 15 11550646 26394 ARS-BFGL-NGS-27782 0.8776148 0.1325517 NA
251 251 16 6121692 27745 BTA-95363-no-rs 1.4433689 0.0360272 0.5967890
252 252 16 6308221 27746 BTB-01975868 3.5022880 0.0003146 NA
253 253 16 6329170 27747 ARS-BFGL-NGS-89740 2.8227273 0.0015041 NA
254 254 16 6421207 27748 BTB-01200020 1.6187843 0.0240556 NA
255 255 16 6446024 27749 BTB-01200008 1.4569926 0.0349146 NA
256 256 16 6529002 27750 Hapmap52413-rs29016330 0.7722977 0.1689283 NA
257 257 16 6549899 27751 ARS-BFGL-NGS-13802 1.4433689 0.0360272 NA
258 258 16 6614897 27752 BTA-109095-no-rs 2.1726638 0.0067195 NA
259 259 16 6656521 27753 Hapmap51387-BTA-109077 2.1726638 0.0067195 NA
260 260 16 6781530 27754 Hapmap33722-BTA-155362 1.4427417 0.0360793 NA
261 261 23 14185501 36197 ARS-BFGL-NGS-34854 1.6567406 0.0220424 0.5312912
262 262 23 14208471 36198 ARS-BFGL-NGS-100006 1.6567406 0.0220424 NA
263 263 23 14239531 36199 ARS-BFGL-NGS-29516 1.5126341 0.0307161 NA
264 264 23 14306746 36200 ARS-BFGL-NGS-78514 1.5126341 0.0307161 NA
265 265 23 14329178 36201 ARS-BFGL-NGS-113147 1.5217814 0.0300759 NA
266 266 23 14365537 36202 ARS-BFGL-NGS-105392 0.8997870 0.1259543 NA
267 267 23 14387143 36203 ARS-BFGL-NGS-59308 1.6397377 0.0229225 NA
268 268 23 14416017 36204 ARS-BFGL-NGS-8960 0.7575833 0.1747498 NA
269 269 23 14487014 36205 ARS-BFGL-NGS-115470 0.8312254 0.1474941 NA
270 270 23 14571804 36206 Hapmap30534-BTA-137081 2.5196142 0.0030226 NA
271 271 24 28487771 37343 ARS-BFGL-BAC-28665 0.5617269 0.2743298 0.5601843
272 272 24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 NA
273 273 24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
274 274 24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
275 275 24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
276 276 24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
277 277 24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
278 278 24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
279 279 24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
280 280 24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
281 281 24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 0.6150000
282 282 24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
283 283 24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
284 284 24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
285 285 24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
286 286 24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
287 287 24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
288 288 24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
289 289 24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
290 290 24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
291 291 24 28540641 37345 BTB-01485274 1.7567714 0.0175077 0.7103497
292 292 24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
293 293 24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
294 294 24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
295 295 24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
296 296 24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
297 297 24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
298 298 24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
299 299 24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
300 300 24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
301 301 24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 0.5794518
302 302 24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
303 303 24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
304 304 24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
305 305 24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
306 306 24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
307 307 24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
308 308 24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
309 309 24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
310 310 24 28963905 37355 BTA-93775-no-rs 0.0204343 0.9540380 NA
311 311 27 16546003 39915 ARS-BFGL-NGS-97489 2.1349421 0.0073292 0.5266455
312 312 27 16628800 39916 Hapmap4685-BTA-29671 2.9562453 0.0011060 NA
313 313 27 16663290 39917 BTB-00952622 4.3841922 0.0000413 NA
314 314 27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
315 315 27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
316 316 27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
317 317 27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
318 318 27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
319 319 27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
320 320 27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
321 321 27 16663290 39917 BTB-00952622 4.3841922 0.0000413 0.5891105
322 322 27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
323 323 27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
324 324 27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
325 325 27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
326 326 27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
327 327 27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
328 328 27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
329 329 27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
330 330 27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
331 331 27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 0.5575927
332 332 27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
333 333 27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
334 334 27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
335 335 27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
336 336 27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
337 337 27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
338 338 27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
339 339 27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
340 340 27 17149375 39927 ARS-BFGL-NGS-55767 0.1452961 0.7156553 NA

Now I used the code bellow to find the smaller SNP p-value per window.

## file below double cheked by hand
win_allsnp_pval <- read.csv("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/pval_for_each_snp_in_window.csv")

library(dplyr)

win_allsnp_pval <- win_allsnp_pval %>%
  mutate(windows = rep(1:ceiling(n() / 10), each = 10, length.out = n()))


min_pval_per_window_entire_row <- win_allsnp_pval %>%
  group_by(windows) %>%
  slice_min(p_value, with_ties = FALSE) %>%
  ungroup()

## file below double cheked by hand
write.csv(min_pval_per_window_entire_row, 
          "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/smaller_snp_pval_per_window.csv")


gallo_imput <- min_pval_per_window_entire_row[, c("Chromosome", "Position.in.bp", "SNP_ID")]

library(dplyr)

gallo_imput_unique <- gallo_imput %>% distinct(SNP_ID, .keep_all = TRUE)

## file below double cheked by hand
write.csv(gallo_imput_unique, 
          "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/gallo_imput.csv") 

12 GALLO - 0.5% Windows - BLUPF90+

okok

gwas <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/gallo_imput.csv")
colnames(gwas)
colnames(gwas) <- c("X", "CHR", "BP", "SNP")


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_genes.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_qtl.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("SNP", "CHR", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_qtl_clean.csv")


out_qtl_unique <- out_qtl[!duplicated(out_qtl$QTL_ID), ]

library(tidyverse)
out.qtl.clean.unique <- select(out_qtl_unique, c("SNP", "CHR", "QTL_type", "start_pos", "end_pos","QTL_ID"))

write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_qtl_clean_UNIQUE.csv")

The GALLO output are bellow:

For GENES

X.1 X CHR BP SNP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 1 2 115986085 ARS-BFGL-NGS-43721 2 115948258 115951955 3698 + ENSBTAG00000021326 CCL20 protein_coding
2 1 2 115986085 ARS-BFGL-NGS-43721 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
3 2 2 116018639 ARS-BFGL-NGS-107330 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
4 3 3 111772736 ARS-BFGL-NGS-25298 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
5 4 3 111933069 ARS-BFGL-NGS-85333 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
6 5 3 112064027 ARS-BFGL-NGS-110683 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
7 4 3 111933069 ARS-BFGL-NGS-85333 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
8 9 9 10112014 ARS-BFGL-NGS-57285 9 9970661 10070666 100006 - ENSBTAG00000020817 B3GAT2 protein_coding
9 10 13 18509812 BTA-25900-no-rs 13 18449975 18466359 16385 + ENSBTAG00000052242 NA lncRNA
10 10 13 18509812 BTA-25900-no-rs 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
11 11 13 18822522 ARS-BFGL-NGS-13518 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
12 12 13 18892498 ARS-BFGL-NGS-109707 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
13 11 13 18822522 ARS-BFGL-NGS-13518 13 18847324 18847381 58 + ENSBTAG00000054243 bta-mir-2285aw miRNA
14 12 13 18892498 ARS-BFGL-NGS-109707 13 18847324 18847381 58 + ENSBTAG00000054243 bta-mir-2285aw miRNA
15 14 16 6308221 BTB-01975868 16 6199772 6333310 133539 - ENSBTAG00000023177 CFH protein_coding
16 16 24 28731501 ARS-BFGL-NGS-118806 24 28655047 28904115 249069 + ENSBTAG00000021190 CDH2 protein_coding
17 17 27 16663290 BTB-00952622 27 16673409 16675767 2359 - ENSBTAG00000050498 NA protein_coding
18 18 27 16692516 ARS-BFGL-NGS-58358 27 16673409 16675767 2359 - ENSBTAG00000050498 NA protein_coding

FOR QTLs

X SNP CHR QTL_type start_pos end_pos QTL_ID
1 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39013
3 ARS-BFGL-NGS-43721 2 Reproduction 115986083 115986087 39014
5 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39015
7 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39016
9 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39017
11 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39018
13 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39019
15 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39020
17 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39021
19 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39022
21 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39023
23 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39024
25 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39025
27 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39026
29 ARS-BFGL-NGS-43721 2 Reproduction 115986083 115986087 39027
31 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39028
33 BTB-01718894 6 Meat_and_Carcass 9223776 9223780 233635
34 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46823
35 BTA-25900-no-rs 13 Exterior 18489668 18489672 46824
36 BTA-25900-no-rs 13 Exterior 18489668 18489672 46825
37 BTA-25900-no-rs 13 Milk 18489668 18489672 46826
38 BTA-25900-no-rs 13 Production 18489668 18489672 46827
39 BTA-25900-no-rs 13 Exterior 18489668 18489672 46828
40 BTA-25900-no-rs 13 Exterior 18489668 18489672 46829
41 BTA-25900-no-rs 13 Production 18489668 18489672 46830
42 BTA-25900-no-rs 13 Production 18489668 18489672 46831
43 BTA-25900-no-rs 13 Exterior 18489668 18489672 46832
44 BTA-25900-no-rs 13 Exterior 18489668 18489672 46833
45 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46834
46 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46835
47 BTA-25900-no-rs 13 Exterior 18489668 18489672 46836
48 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46837
49 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46838
50 BTA-25900-no-rs 13 Exterior 18509810 18509814 46839
51 BTA-25900-no-rs 13 Exterior 18509810 18509814 46840
52 BTA-25900-no-rs 13 Production 18509810 18509814 46841
53 BTA-25900-no-rs 13 Exterior 18509810 18509814 46842
54 BTA-25900-no-rs 13 Exterior 18509810 18509814 46843
55 BTA-25900-no-rs 13 Production 18509810 18509814 46844
56 BTA-25900-no-rs 13 Production 18509810 18509814 46845
57 BTA-25900-no-rs 13 Exterior 18509810 18509814 46846
58 BTA-25900-no-rs 13 Exterior 18509810 18509814 46847
59 BTA-25900-no-rs 13 Exterior 18509810 18509814 46848
60 BTA-25900-no-rs 13 Exterior 18509810 18509814 46849
61 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46850
62 BTA-25900-no-rs 13 Health 18509810 18509814 46851
63 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46852
64 BTA-25900-no-rs 13 Exterior 18509810 18509814 46853
65 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46854
66 BTA-25900-no-rs 13 Exterior 18558538 18558542 46855
67 BTA-25900-no-rs 13 Exterior 18558538 18558542 46856
68 BTA-25900-no-rs 13 Milk 18558538 18558542 46857
69 BTA-25900-no-rs 13 Exterior 18558538 18558542 46858
70 BTA-25900-no-rs 13 Milk 18558538 18558542 46859
71 BTA-25900-no-rs 13 Production 18558538 18558542 46860
72 BTA-25900-no-rs 13 Production 18558538 18558542 46861
73 BTA-25900-no-rs 13 Milk 18558538 18558542 46862
74 BTA-25900-no-rs 13 Exterior 18558538 18558542 46863
75 BTA-25900-no-rs 13 Exterior 18558538 18558542 46864
76 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46865
77 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46866
78 BTA-25900-no-rs 13 Exterior 18558538 18558542 46867
79 BTA-25900-no-rs 13 Exterior 18558538 18558542 46868
80 ARS-BFGL-NGS-13518 13 Production 18791941 18791945 186292
81 ARS-BFGL-NGS-13518 13 Production 18822520 18822524 68549
82 ARS-BFGL-NGS-13518 13 Meat_and_Carcass 18840874 18840878 234091
83 ARS-BFGL-NGS-109707 13 Reproduction 18892496 18892500 15278
84 Hapmap30534-BTA-137081 23 Milk 14528139 14528143 164892
85 Hapmap30534-BTA-137081 23 Reproduction 14532015 14532019 28651
86 ARS-BFGL-NGS-118806 24 Reproduction 28703205 28703209 138597
87 ARS-BFGL-NGS-118806 24 Health 28750898 28750902 170376
88 ARS-BFGL-NGS-118806 24 Milk 28758507 28758511 180615
89 BTB-00952622 27 Milk 16616482 16616486 70382
90 BTB-00952622 27 Milk 16617996 16618000 70383
91 BTB-00952622 27 Milk 16669779 16669783 70769

FOR QTLs - UNIQUE

X SNP CHR QTL_type start_pos end_pos QTL_ID
1 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39013
2 ARS-BFGL-NGS-107330 2 Production 115986083 115986087 39013
3 ARS-BFGL-NGS-43721 2 Reproduction 115986083 115986087 39014
4 ARS-BFGL-NGS-107330 2 Reproduction 115986083 115986087 39014
5 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39015
6 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39015
7 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39016
8 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39016
9 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39017
10 ARS-BFGL-NGS-107330 2 Milk 115986083 115986087 39017
11 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39018
12 ARS-BFGL-NGS-107330 2 Production 115986083 115986087 39018
13 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39019
14 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39019
15 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39020
16 ARS-BFGL-NGS-107330 2 Milk 115986083 115986087 39020
17 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39021
18 ARS-BFGL-NGS-107330 2 Production 115986083 115986087 39021
19 ARS-BFGL-NGS-43721 2 Production 115986083 115986087 39022
20 ARS-BFGL-NGS-107330 2 Production 115986083 115986087 39022
21 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39023
22 ARS-BFGL-NGS-107330 2 Milk 115986083 115986087 39023
23 ARS-BFGL-NGS-43721 2 Milk 115986083 115986087 39024
24 ARS-BFGL-NGS-107330 2 Milk 115986083 115986087 39024
25 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39025
26 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39025
27 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39026
28 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39026
29 ARS-BFGL-NGS-43721 2 Reproduction 115986083 115986087 39027
30 ARS-BFGL-NGS-107330 2 Reproduction 115986083 115986087 39027
31 ARS-BFGL-NGS-43721 2 Exterior 115986083 115986087 39028
32 ARS-BFGL-NGS-107330 2 Exterior 115986083 115986087 39028
33 BTB-01718894 6 Meat_and_Carcass 9223776 9223780 233635
34 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46823
35 BTA-25900-no-rs 13 Exterior 18489668 18489672 46824
36 BTA-25900-no-rs 13 Exterior 18489668 18489672 46825
37 BTA-25900-no-rs 13 Milk 18489668 18489672 46826
38 BTA-25900-no-rs 13 Production 18489668 18489672 46827
39 BTA-25900-no-rs 13 Exterior 18489668 18489672 46828
40 BTA-25900-no-rs 13 Exterior 18489668 18489672 46829
41 BTA-25900-no-rs 13 Production 18489668 18489672 46830
42 BTA-25900-no-rs 13 Production 18489668 18489672 46831
43 BTA-25900-no-rs 13 Exterior 18489668 18489672 46832
44 BTA-25900-no-rs 13 Exterior 18489668 18489672 46833
45 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46834
46 BTA-25900-no-rs 13 Reproduction 18489668 18489672 46835
47 BTA-25900-no-rs 13 Exterior 18489668 18489672 46836
48 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46837
49 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46838
50 BTA-25900-no-rs 13 Exterior 18509810 18509814 46839
51 BTA-25900-no-rs 13 Exterior 18509810 18509814 46840
52 BTA-25900-no-rs 13 Production 18509810 18509814 46841
53 BTA-25900-no-rs 13 Exterior 18509810 18509814 46842
54 BTA-25900-no-rs 13 Exterior 18509810 18509814 46843
55 BTA-25900-no-rs 13 Production 18509810 18509814 46844
56 BTA-25900-no-rs 13 Production 18509810 18509814 46845
57 BTA-25900-no-rs 13 Exterior 18509810 18509814 46846
58 BTA-25900-no-rs 13 Exterior 18509810 18509814 46847
59 BTA-25900-no-rs 13 Exterior 18509810 18509814 46848
60 BTA-25900-no-rs 13 Exterior 18509810 18509814 46849
61 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46850
62 BTA-25900-no-rs 13 Health 18509810 18509814 46851
63 BTA-25900-no-rs 13 Reproduction 18509810 18509814 46852
64 BTA-25900-no-rs 13 Exterior 18509810 18509814 46853
65 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46854
66 BTA-25900-no-rs 13 Exterior 18558538 18558542 46855
67 BTA-25900-no-rs 13 Exterior 18558538 18558542 46856
68 BTA-25900-no-rs 13 Milk 18558538 18558542 46857
69 BTA-25900-no-rs 13 Exterior 18558538 18558542 46858
70 BTA-25900-no-rs 13 Milk 18558538 18558542 46859
71 BTA-25900-no-rs 13 Production 18558538 18558542 46860
72 BTA-25900-no-rs 13 Production 18558538 18558542 46861
73 BTA-25900-no-rs 13 Milk 18558538 18558542 46862
74 BTA-25900-no-rs 13 Exterior 18558538 18558542 46863
75 BTA-25900-no-rs 13 Exterior 18558538 18558542 46864
76 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46865
77 BTA-25900-no-rs 13 Reproduction 18558538 18558542 46866
78 BTA-25900-no-rs 13 Exterior 18558538 18558542 46867
79 BTA-25900-no-rs 13 Exterior 18558538 18558542 46868
80 ARS-BFGL-NGS-13518 13 Production 18791941 18791945 186292
81 ARS-BFGL-NGS-13518 13 Production 18822520 18822524 68549
82 ARS-BFGL-NGS-13518 13 Meat_and_Carcass 18840874 18840878 234091
83 ARS-BFGL-NGS-109707 13 Reproduction 18892496 18892500 15278
84 Hapmap30534-BTA-137081 23 Milk 14528139 14528143 164892
85 Hapmap30534-BTA-137081 23 Reproduction 14532015 14532019 28651
86 ARS-BFGL-NGS-118806 24 Reproduction 28703205 28703209 138597
87 ARS-BFGL-NGS-118806 24 Health 28750898 28750902 170376
88 ARS-BFGL-NGS-118806 24 Milk 28758507 28758511 180615
89 BTB-00952622 27 Milk 16616482 16616486 70382
90 BTB-00952622 27 Milk 16617996 16618000 70383
91 BTB-00952622 27 Milk 16669779 16669783 70769
92 ARS-BFGL-NGS-58358 27 Milk 16669779 16669783 70769

QTL Plots okok

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window")


#QTL type plot
oldpar <- par(mar=c(5,15,3,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

#QTL names plot (by type)

# Set the width and height in pixels for 600 DPI
png("qtl_names_w05.png", width=5100, height=6600, res=600)  # Width and height in pixels
# Set layout for multiple plots
par(mfrow=c(6, 1), mar=c(2, 20, 1, 1))
# List of QTL classes for titles
qtl_classes <- c("Production", "Reproduction", "Milk", "Meat_and_Carcass", "Health", "Exterior")
# Loop through each QTL class and plot
for (qtl_class in qtl_classes) {
  plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=qtl_class)
  title(main=qtl_class)  # Add the QTL class as the main title for each plot
}
# Close the device
dev.off()


#QTL enrichment analysis 

out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_enrich_qtl_genome_name_w05.csv")


out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_enrich_qtl_genome_type_w05.csv")


#Plots

#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL, out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL, out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL type My Image

QTL name by type My Image

12.0.0.1 Windows - QTL enrichment on GALLO

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
19 Rear leg placement - rear view 4 491 75 163224 0.0000830 0.0011623 Exterior
20 Rear leg placement - side view 4 430 75 163224 0.0000498 0.0011623 Exterior
7 Feet and leg conformation 4 627 75 163224 0.0002112 0.0019216 Exterior
8 Foot angle 4 672 75 163224 0.0002745 0.0019216 Exterior
4 Calving ease 8 3819 75 163224 0.0003739 0.0020939 Reproduction
24 Teat placement - front 3 327 75 163224 0.0004834 0.0022560 Exterior
17 Net merit 4 903 75 163224 0.0008283 0.0033132 Production
14 Milk potassium content 3 570 75 163224 0.0023734 0.0083071 Milk
18 PTA type 3 627 75 163224 0.0031021 0.0093633 Production
22 Stillbirth 4 1363 75 163224 0.0036784 0.0093633 Reproduction
26 Udder attachment 3 655 75 163224 0.0035051 0.0093633 Exterior
27 Udder depth 3 695 75 163224 0.0041339 0.0096458 Exterior
11 Length of productive life 4 2004 75 163224 0.0138268 0.0297809 Production
23 Strength 2 664 75 163224 0.0377215 0.0754430 Exterior
6 Fecal larva count 1 125 75 163224 0.0558511 0.1042555 Health
10 Interval to first estrus after calving 2 1053 75 163224 0.0848080 0.1484141 Reproduction
25 Teat placement - rear 1 298 75 163224 0.1281036 0.2109941 Exterior
9 Interval from first to last insemination 1 445 75 163224 0.1851892 0.2880721 Reproduction
28 Udder height 1 504 75 163224 0.2070495 0.3051257 Exterior
2 Body depth 1 616 75 163224 0.2469672 0.3457541 Production
5 Connective tissue amount 2 3142 75 163224 0.4246665 0.5169853 Meat and Carcass
16 Milk protein yield 2 3093 75 163224 0.4168784 0.5169853 Milk
21 Somatic cell score 1 1122 75 163224 0.4039597 0.5169853 Health
1 Average daily gain 1 3140 75 163224 0.7671113 0.8591646 Production
12 Milk fat percentage 4 10941 75 163224 0.7485863 0.8591646 Milk
3 Body weight 1 4289 75 163224 0.8643358 0.9175509 Production
13 Milk fat yield 2 8220 75 163224 0.8967985 0.9175509 Milk
15 Milk protein percentage 2 8803 75 163224 0.9175509 0.9175509 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 29 9077 75 163224 0.0000000 0.0000000
2 Health 2 5889 75 163224 0.7581297 1.0000000
3 Meat and Carcass 2 18258 75 163224 0.9985731 1.0000000
4 Milk 13 75352 75 163224 1.0000000 1.0000000
5 Production 14 19640 75 163224 0.0622566 0.1867698
6 Reproduction 15 35008 75 163224 0.6642010 1.0000000

My Image

13 g:PROFILER - 0.5% Windows - BLUPF90+

This output are in this directory: /home/bambrozi/2_CORTISOL/GPROFILER/BLUPF90_GWAS/keep_all_pheno_windows/smallest_snp_pvalue_per_window

Bellow the data inserted in g:PROFILER My Image

Bellow we can find the output for g:Profiler online

My Image

source term_name term_id highlighted adjusted_p_value negative_log10_of_adjusted_p_value term_size query_size intersection_size effective_domain_size intersections
GO:MF galactosylgalactosylxylosylprotein 3-beta-glucuronosyltransferase activity GO:0015018 true 0.0416190 1.380709 2 6 1 14414 ENSBTAG00000020817
GO:MF gamma-catenin binding GO:0045295 true 0.0484545 1.314666 11 6 1 14414 ENSBTAG00000021190
GO:MF complement component C3b binding GO:0001851 true 0.0484545 1.314666 5 6 1 14414 ENSBTAG00000023177
GO:MF complement binding GO:0001848 false 0.0484545 1.314666 13 6 1 14414 ENSBTAG00000023177
GO:MF alpha-catenin binding GO:0045294 true 0.0484545 1.314666 10 6 1 14414 ENSBTAG00000021190
GO:MF opsonin binding GO:0001846 false 0.0484545 1.314666 14 6 1 14414 ENSBTAG00000023177
GO:CC adherens junction GO:0005912 true 0.0481595 1.317318 103 7 2 17218 ENSBTAG00000014991,ENSBTAG00000021190
GO:CC PAR polarity complex GO:0120157 true 0.0481595 1.317318 3 7 1 17218 ENSBTAG00000014991
REAC Cell-cell junction organization REAC:R-BTA-421270 false 0.0053628 2.270610 48 4 2 8225 ENSBTAG00000014991,ENSBTAG00000021190
REAC Cell junction organization REAC:R-BTA-446728 false 0.0058851 2.230249 71 4 2 8225 ENSBTAG00000014991,ENSBTAG00000021190
REAC Cell-Cell communication REAC:R-BTA-1500931 false 0.0064442 2.190829 91 4 2 8225 ENSBTAG00000014991,ENSBTAG00000021190
REAC Tight junction interactions REAC:R-BTA-420029 false 0.0196781 1.706017 6 4 1 8225 ENSBTAG00000014991
REAC TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) REAC:R-BTA-2173791 false 0.0392916 1.405700 15 4 1 8225 ENSBTAG00000014991
REAC Myogenesis REAC:R-BTA-525793 false 0.0479618 1.319105 22 4 1 8225 ENSBTAG00000021190
WP EBV LMP1 signaling WP:WP984 false 0.0317587 1.498137 13 2 1 2450 ENSBTAG00000021326
WP Complement and coagulation cascades WP:WP1056 false 0.0377267 1.423351 31 2 1 2450 ENSBTAG00000023177
HP Interhypothalamic adhesion HP:0033105 false 0.0446133 1.350536 1 1 1 5133 ENSBTAG00000021190

I used the code bellow to bring the gene name to g:PROFILER output

out_genes <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/lowest_snp_pvalue_per_window/out_genes.csv")

out_gprofiler <- read.csv("/home/bambrozi/2_CORTISOL/GPROFILER/BLUPF90_GWAS/keep_all_pheno_windows/smallest_snp_pvalue_per_window/gProfiler_btaurus_2025-02-14_11-00-34 a.m.__intersections.csv")

# Supondo que out_gprofiler seja o seu data frame e intersections a coluna que contém as IDs separadas por vírgula
library(tidyr)

# Dividir a coluna 'intersections' em duas novas colunas, por exemplo 'ID_1' e 'ID_2'
out_gprofiler <- out_gprofiler %>%
  separate(intersections, into = c("ID_1", "ID_2"), sep = ",")

# Visualize o resultado
print(out_gprofiler)


gene_list <- out_genes[, c("gene_id", "gene_name")]

gene_list <- gene_list[!duplicated(gene_list$gene_id), ]

library(dplyr)

# Supondo que out_gprofiler tenha a coluna 'ID_1' e gene_list tenha as colunas 'gene_id' e 'gene_name'

out_gprofiler <- out_gprofiler %>%
  left_join(gene_list, by = c("ID_1" = "gene_id"))

colnames(out_gprofiler)

# Rename 'gene_name' to 'gene_name_1'
colnames(out_gprofiler)[colnames(out_gprofiler) == "gene_name"] <- "gene_name_1"

out_gprofiler <- out_gprofiler %>%
  left_join(gene_list, by = c("ID_2" = "gene_id"))

# Rename 'gene_name' to 'gene_name_1'
colnames(out_gprofiler)[colnames(out_gprofiler) == "gene_name"] <- "gene_name_2"

write.csv(out_gprofiler,
          "/home/bambrozi/2_CORTISOL/GPROFILER/BLUPF90_GWAS/keep_all_pheno_windows/smallest_snp_pvalue_per_window/gProfiler_btaurus_2025-02-14_11-00-34 a.m.__intersections_gene_name.csv")
X source term_name term_id highlighted adjusted_p_value negative_log10_of_adjusted_p_value term_size query_size intersection_size effective_domain_size ID_1 ID_2 gene_name_1 gene_name_2
1 GO:MF galactosylgalactosylxylosylprotein 3-beta-glucuronosyltransferase activity GO:0015018 true 0.0416190 1.380709 2 6 1 14414 ENSBTAG00000020817 NA B3GAT2 NA
2 GO:MF gamma-catenin binding GO:0045295 true 0.0484545 1.314666 11 6 1 14414 ENSBTAG00000021190 NA CDH2 NA
3 GO:MF complement component C3b binding GO:0001851 true 0.0484545 1.314666 5 6 1 14414 ENSBTAG00000023177 NA CFH NA
4 GO:MF complement binding GO:0001848 false 0.0484545 1.314666 13 6 1 14414 ENSBTAG00000023177 NA CFH NA
5 GO:MF alpha-catenin binding GO:0045294 true 0.0484545 1.314666 10 6 1 14414 ENSBTAG00000021190 NA CDH2 NA
6 GO:MF opsonin binding GO:0001846 false 0.0484545 1.314666 14 6 1 14414 ENSBTAG00000023177 NA CFH NA
7 GO:CC adherens junction GO:0005912 true 0.0481595 1.317318 103 7 2 17218 ENSBTAG00000014991 ENSBTAG00000021190 PARD3 CDH2
8 GO:CC PAR polarity complex GO:0120157 true 0.0481595 1.317318 3 7 1 17218 ENSBTAG00000014991 NA PARD3 NA
9 REAC Cell-cell junction organization REAC:R-BTA-421270 false 0.0053628 2.270610 48 4 2 8225 ENSBTAG00000014991 ENSBTAG00000021190 PARD3 CDH2
10 REAC Cell junction organization REAC:R-BTA-446728 false 0.0058851 2.230249 71 4 2 8225 ENSBTAG00000014991 ENSBTAG00000021190 PARD3 CDH2
11 REAC Cell-Cell communication REAC:R-BTA-1500931 false 0.0064442 2.190829 91 4 2 8225 ENSBTAG00000014991 ENSBTAG00000021190 PARD3 CDH2
12 REAC Tight junction interactions REAC:R-BTA-420029 false 0.0196781 1.706017 6 4 1 8225 ENSBTAG00000014991 NA PARD3 NA
13 REAC TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) REAC:R-BTA-2173791 false 0.0392916 1.405700 15 4 1 8225 ENSBTAG00000014991 NA PARD3 NA
14 REAC Myogenesis REAC:R-BTA-525793 false 0.0479618 1.319105 22 4 1 8225 ENSBTAG00000021190 NA CDH2 NA
15 WP EBV LMP1 signaling WP:WP984 false 0.0317587 1.498137 13 2 1 2450 ENSBTAG00000021326 NA CCL20 NA
16 WP Complement and coagulation cascades WP:WP1056 false 0.0377267 1.423351 31 2 1 2450 ENSBTAG00000023177 NA CFH NA
17 HP Interhypothalamic adhesion HP:0033105 false 0.0446133 1.350536 1 1 1 5133 ENSBTAG00000021190 NA CDH2 NA

14 Heritability estimation - BLUPF90

14.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • Parameter file

      14.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2

      To get in one file these four columns we need the following code:

      #File with equivalence among different ids
      eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
      
      # Genotype file with cid
      geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
      
      # Phenotipic file and fixed effects
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
      
      # creating a pheno file with only ID, Cortisol, BY and Sam date columns
      pheno <- data_final %>%
        select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column iid and and bring the iid from eq_ids to geno file
      pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
      
      # organizing columns sequence and keep only iid
      pheno <- pheno%>%
        select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column geno$iid, and bring the iid from eq_ids to geno file
      geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
      
      # organizing the columns sequence
      library(dplyr)
      geno <- geno %>%
        select(V1, V2, iid, everything())
      
      # Keeping in the pheno file only the rows present also in geno file
      pheno <- pheno %>%
        filter(iid %in% geno$iid)
      
      write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)

      The file should be saved as text file, with separation by space and no columns names.

      14.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      14.1.3 Genotype file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      14.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+
      • renumF90

      14.1.5 Parameter file

      The appearance of this file is like this:

      My Image

      • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
      • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
      • FIELDS_PASSED TO OUTPUT:
      • WEIGHT (S):
      • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
      • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
      • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
      • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
      • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
      • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
      • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
      • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
      • OPTION method: VCE (Variance Component Estimation).
      • OPTION OrigID: this will keep the original ID informed.
      • OPTION missing 9999: you are informing that missing values will appear as 9999
      • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
        • H2_1: the name that your function will appear on the output files.
        • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
        • **_1_1**: this effect is in the 1st column.
        • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

14.2 Running renumF90 and BlupF90+

  1. Go to the server you wanna run this analysis, for instance, grand

  2. Now go to the directory you have created to run this analysis where that set of files are placed.

ssh grand
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will as you the name of your parameter card.

renumF90 will generate a new parameter card called renf90.par

  1. Run blupf90+
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par

blupF90+ will generate the blupf90.log file with the results.

My Image

Now you should update you renf90.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE

  1. 2nd blupf90+ run
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

14.3 Running renumF90 and BlupF90+ adding GENOTYPES

The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.

Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.

The highlighted text show the added part.

My Image

Go to the sub-diretory

Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location

My Image

To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90

./../renumf90

Run blupf90+

./../blupf90+

The steps for run, update parameter card, re-run are the same.

14.4 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image

15 Additional analysis

15.0.1 GEBVs Average and SD

I calculated the average and SD for the GEBVs

GEBVs <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/solutions.orig", header = T)

equiv_id <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")

mer <- merge(equiv_id, GEBVs, by.x = "iid", by.y = "original_id")

cortisol  <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

output <- merge(cortisol, mer, by.x = "ID", by.y = "elora_id")

colnames(output)

output <- output[, c("ID", "iid", "cdn_id", "T4Cortisol", "solution")]

mean_solution <- mean(output$solution, na.rm = TRUE)
sd_solution <- sd(output$solution, na.rm = TRUE)

mean_solution
sd_solution

library(ggplot2)

# Create histogram with density curve, mean, and SD lines
ggplot(output, aes(x = solution)) +
  geom_histogram(aes(y = ..density..), fill = "lightblue", color = "black", bins = 30, alpha = 0.6) +
  geom_density(color = "darkred", size = 1) +  # Density curve
  geom_vline(aes(xintercept = mean_solution), color = "red", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution + sd_solution), color = "blue", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution - sd_solution), color = "blue", linetype = "dashed", size = 1) +
  labs(title = "Histogram and Density Curve of GEBVs", x = "Solution", y = "Density") +
  theme_minimal()


# Subset for values BELOW 1 SD from the mean
output_low <- output %>% 
  filter(solution < (mean_solution - sd_solution))

# Subset for values ABOVE 1 SD from the mean
output_high <- output %>% 
  filter(solution > (mean_solution + sd_solution))

# Subset for values WITHIN 1 SD from the mean (intermediary)
output_intermediary <- output %>% 
  filter(solution >= (mean_solution - sd_solution) & solution <= (mean_solution + sd_solution))

write.csv(output, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/all.csv")
write.csv(output_low, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/low.csv")
write.csv(output_high, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/high.csv")
write.csv(output_intermediary, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/intermediary.csv")

mean_solution <- round(mean(output$solution, na.rm = TRUE), 2)
sd_solution <- round(sd(output$solution, na.rm = TRUE), 2)


output$Category <- ifelse(output$T4Cortisol >= 956, "H", 
                            ifelse(output_low <= 190.8, "L", "M"))

# Create the plot
ggplot(output, aes(x = T4Cortisol, y = solution)) +
  geom_point(size = 2, color = "black") +  # Smaller scatter plot points
  labs(x = "T4 Cortisol (pg/mL)", y = "GEBV", title = "T4 Cortisol vs GEBV") +
  geom_hline(yintercept = mean_solution, color = "blue", linetype = "dashed", size = 1) +  # Mean line
  geom_hline(yintercept = mean_solution + sd_solution, color = "red", linetype = "dashed", size = 1) +  # +1SD line
  geom_hline(yintercept = mean_solution - sd_solution, color = "red", linetype = "dashed", size = 1) +  # -1SD line
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution, label = "Mean"), color = "blue", vjust = -0.5, hjust = 1) +  # Label for mean
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution + sd_solution, label = "+1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for +1SD
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution - sd_solution, label = "-1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for -1SD
  theme_minimal() +
  theme(
    axis.line = element_line(color = "black"), 
    panel.grid.major = element_line(color = "gray80", linewidth = 0.5), 
    panel.grid.minor = element_line(color = "gray90", linewidth = 0.25), 
    legend.title = element_text(size = 10),
    legend.position = "right"
  )

15.0.2 Age at sampling date

Merg_Cort_GEBVs <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, all_of(Columns_to_use))

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date2$Sampling_date <- as.Date(samp_date2$Sampling_date, format = "%m/%d/%Y")

table(samp_date2$Sampling_date)

samp_date <- select(samp_date2, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, Sampling_date)

library(lubridate)

data_final$BIRTH_DATE <- ymd(data_final$BIRTH_DATE)

# Calculate age in total days
data_final$Age_days <- as.numeric(difftime(data_final$Sampling_date, data_final$BIRTH_DATE, units = "days"))


# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/Data/data_age_samplingdate.csv")

# Calculate values
average_age <- mean(data_final$Age_days, na.rm = TRUE)
sd_age <- sd(data_final$Age_days, na.rm = TRUE)
median_age <- median(data_final$Age_days, na.rm = TRUE)
range_age <- range(data_final$Age_days, na.rm = TRUE)

# Create a data frame to save
age_summary <- data.frame(
  Metric = c("Mean", "Standard Deviation", "Median", "Min", "Max"),
  Value = c(average_age, sd_age, median_age, range_age[1], range_age[2])
)

# Save to a CSV file
write.csv(age_summary, "/home/bambrozi/2_CORTISOL/Data/age_summary.csv", row.names = FALSE)
Metric Value
Mean 244.86822
Standard Deviation 42.92195
Median 237.50000
Min 162.00000
Max 365.00000

15.0.3 Common enriched QTLs

qtl_pval <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name_pvalue.csv")
qtl_w05 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w05.csv")
qtl_w01 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w01.csv")

library(tidyverse)

qtl_pval <- filter(qtl_pval, adj.pval <= 0.05) 
qtl_w05 <- filter(qtl_w05, adj.pval <= 0.05) 
qtl_w01 <- filter(qtl_w01, adj.pval <= 0.05) 

qtl_pval$approach <- "pval"
qtl_w05$approach <- "w05"
qtl_w01$approach <- "w01"

qtl_all_approachs <- rbind(qtl_pval, qtl_w05, qtl_w01)

write.csv(qtl_all_approachs, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/enrich_sig_qtl_all_approachs.csv")

qtl_names <- unique(qtl_all_approachs$QTL)

qtl_names <- sort(qtl_names)


# Create a matrix with the desired dimensions
matrix_length <- length(qtl_names)
common_enrich_qtl <- matrix(NA, nrow = matrix_length, ncol = 4)

# Optionally, name the rows and columns
colnames(common_enrich_qtl) <- c("QTL_names", "Single_SNP", "Windows_0.5", "Windows_0.1")

common_enrich_qtl[, "QTL_names"] <- qtl_names

common_enrich_qtl <- as.data.frame(common_enrich_qtl)

# Ensure both vectors have the same length (or handle if they don't)
common_enrich_qtl$Single_SNP <- ifelse(common_enrich_qtl$QTL_names %in% qtl_pval$QTL, "yes", NA)
common_enrich_qtl$Windows_0.5 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w05$QTL, "yes", NA)
common_enrich_qtl$Windows_0.1 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w01$QTL, "yes", NA)

write.csv(common_enrich_qtl, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/common_enrich_qtl.csv")